import pandas as pd
import numpy as np
import re
def get_sector_map_step1(ds_):
    ds = ds_.copy()
    ds['Mixed_sector'] = [ds['Industry Group'].iloc[i] if ds['Industry Group'].iloc[i]!=None else ds['Primary industry'].iloc[i] for i in range(len(ds))]
    ds['Mixed_sector'] = ds['Mixed_sector'].apply(lambda x: str(x).lower())
    ds['Mixed_sector'] = ds['Mixed_sector'].apply(lambda x: str(x).replace('&', 'and'))
    ds['Mixed_sector'] = ds['Mixed_sector'].apply(lambda x: str(x).replace(',', ''))
    ds['Mixed_sector'] = ds['Mixed_sector'].apply(lambda x: str(x).replace('contruction', 'construction'))
    ds['Mixed_sector'] = ds['Mixed_sector'].apply(lambda x: str(x).replace('home building', 'homebuilding'))
    ds['Mixed_sector'].replace(re.compile('.*mining.*'), 'mining', inplace = True)
    ds['Mixed_sector'].replace(re.compile('.*transportation.*'), 'transportation', inplace = True)
    ds['main_sector'] = ds['Mixed_sector']

    sector_map = {'banks diverse financials insurance': 'Financial',
    'services': 'Services',
    'technology hardware and equipment': 'ICT',
    'infrastructure': 'Industrial', 
    'manufacturing': 'Industrial',
    'building products': 'Industrial',
    'transportation': 'Industrial',
    'retail': 'Consumer Discretionary',
    'food beverage and agriculture': 'Consumer Staples',
    'materials': 'Material',
    'oil and gas': 'Energy',
    'fossil fuels': 'Energy',
    'real estate': 'Real Estate',
    'consumer durables household and personal products': 'Consumer Discretionary',
    'mineral extraction': 'Material',
    'biotech health care and pharma': 'Health Care',
    'gas utilities': 'Utilities',
    'retailing': 'Consumer Discretionary',
    'trading companies and distributors and commercial services and supplies': 'Industrial',
    'semiconductors and semiconductors equipment': 'ICT',
    'power generation': 'Utilities',
    'banks diverse financials and insurance': 'Financial',
    'chemicals': 'Material',
    'telecommunication services': 'ICT',
    'electrical equipment and machinery': 'Industrial',
    'mining': 'Material',
    'apparel': 'Consumer Discretionary',
    'hospitality': 'Consumer Discretionary',
    'automobiles and components': 'Consumer Discretionary',
    'food and beverage processing': 'Consumer Staples',
    'electric utilities and independent power producers and energy traders (including fossil alternative and nuclear energy)': 'Utilities',
    'food and staples retailing': 'Consumer Staples',
    'aerospace and defense': 'Industrial',
    'pharmaceuticals biotechnology and life sciences': 'Health Care',
    'construction and engineering': 'Industrial',
    'software and services': 'ICT',
    'forest and paper products - forestry timber pulp and paper rubber': 'Material',
    'none': np.nan,
    'health care providers and services and healthcare technology': 'Health Care',
    'media': 'ICT',
    'textiles apparel footwear and luxury goods': 'Consumer Discretionary',
    'containers and packaging': 'Material',
    'healthcare providers and services and healthcare technology': 'Health Care',
    'hotels restaurants and leisure and tourism services': 'Consumer Discretionary',
    'construction materials': 'Material',
    'homebuilding': 'Consumer Discretionary',
    'healthcare equipment and supplies': 'Health Care',
    'professional services': 'Industrial',
    'tires': 'Consumer Discretionary',
    'water utilities': 'Utilities',
    'international bodies': 'Other',
    'corporate tags': 'Other',
    'specialized consumer services': 'Consumer Discretionary'
     }
    ds['main_sector'] = ds['main_sector'].replace(sector_map)
    return ds, sector_map

def get_sector_map_step2(ds_):
    ds = ds_.copy()
    ds['Mixed_sectors'] = [ds.main_sector.iloc[i] if ds.main_sector.iloc[i] != 'Services' else ds['Primary activity'].iloc[i] for i in range(len(ds))]
    sector_map = {
    'Banks': 'Financial',
    'IT services': 'ICT',
    'Other professional services': 'Industrial',
    'REIT': 'Financial',
    'Travel services': 'Consumer Discretionary',
    'Insurance': 'Financial',
    'Telecommunications services': 'ICT',
    'Print publishing': 'ICT',
    'Health care services': 'Health Care',
    'Other financial': 'Financial',
    'Asset managers': 'Financial',
    'Engineering services': 'Industrial',
    'Dealers, wholesalers & distributors': 'Consumer Discretionary',
    'Animal products wholesale': 'Consumer Discretionary',
    'Vehicles & machinery rental & leasing': 'Industrial',
    'Software': 'ICT',
    'Commercial services': 'Industrial',
    'Real estate services': 'Real Estate',
    'Transportation support services': 'Industrial',
    'Media': 'ICT',
    'Food & beverage wholesale': 'Consumer Staples',
    'Web-based services': 'ICT',
    'Mining & metals support services': 'Material',
    'Marketing': 'Other',
    'Energy services & equipment': 'Energy',
    'Industrial machinery distribution': 'Industrial',
    'Construction & building materials dealing & distribution': 'Material',
    'Education services': 'Consumer Discretionary',
    'Chemicals wholesale & distribution': 'Material',
    'Transportation equipment wholesale & dealing': 'Industrial',
    'Servers & data centers': 'ICT',
    'Industrial services': 'Industrial',
    'Consumer services': 'Consumer Discretionary',
    'Printing services': 'Industrial',
    'Agricultural products wholesale': 'Consumer Staples',
    'Consumer goods wholesale & rental': 'Consumer Discretionary',
    'Technology hardware wholesale & distribution': 'ICT',
    'Wood & paper products wholesale': 'Material',
    'Pharma & health care supplies wholesale & distribution': 'Health Care',
    'Biofuel supply': 'Material'}
    
    ds['main_sector'] = ds['Mixed_sectors'].replace(sector_map)
    return ds, sector_map
    
    
    
